; -*-Scheme-*-

;************************************************************************/
;*                                                                      */
;* Copyright (c) 2003-2009 Vladimir Tsichevski <tsichevski@gmail.com>   */
;*                                                                      */
;* This file is part of bigloo-lib (http://bigloo-lib.sourceforge.net)  */
;*                                                                      */
;* This library is free software; you can redistribute it and/or        */
;* modify it under the terms of the GNU Lesser General Public           */
;* License as published by the Free Software Foundation; either         */
;* version 2 of the License, or (at your option) any later version.     */
;*                                                                      */
;* This library is distributed in the hope that it will be useful,      */
;* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    */
;* Lesser General Public License for more details.                      */
;*                                                                      */
;* You should have received a copy of the GNU Lesser General Public     */
;* License along with this library; if not, write to the Free Software  */
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 */
;* USA                                                                  */
;*                                                                      */
;************************************************************************/
(module
 encoding
 (import xtree xmlregexp xmlio)
 (library common)
 (include "common.sch")
 (extern
  (include "libxml/encoding.h")
  )
 )


;; xmlCharEncoding:

;; Predefined values for some standard encodings.
;; Libxml does not do beforehand translation on UTF8 and ISOLatinX.
;; It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
;
;; Anything else would have to be translated to UTF8 before being
;; given to the parser itself. The BOM for UTF16 and the encoding
;; declaration are looked at and a converter is looked for at that
;; point. If not found the parser stops here as asked by the XML REC. A
;; converter can be registered by the user using xmlRegisterCharEncodingHandler
;; but the current form doesn't allow stateful transcoding (a serious
;; problem agreed !). If iconv has been found it will be used
;; automatically and allow stateful transcoding, the simplest is then
;; to be sure to enable iconv and to provide iconv libs for the encoding
;; support needed.
;
;; Note that the generic "UTF-16" is not a predefined value.  Instead, only
;; the specific UTF-16LE and UTF-16BE are present.

;; typedef enum {
;;     XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
;;     XML_CHAR_ENCODING_NONE=	0, /* No char encoding detected */
;;     XML_CHAR_ENCODING_UTF8=	1, /* UTF-8 */
;;     XML_CHAR_ENCODING_UTF16LE=	2, /* UTF-16 little endian */
;;     XML_CHAR_ENCODING_UTF16BE=	3, /* UTF-16 big endian */
;;     XML_CHAR_ENCODING_UCS4LE=	4, /* UCS-4 little endian */
;;     XML_CHAR_ENCODING_UCS4BE=	5, /* UCS-4 big endian */
;;     XML_CHAR_ENCODING_EBCDIC=	6, /* EBCDIC uh! */
;;     XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
;;     XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
;;     XML_CHAR_ENCODING_UCS2=	9, /* UCS-2 */
;;     XML_CHAR_ENCODING_8859_1=	10,/* ISO-8859-1 ISO Latin 1 */
;;     XML_CHAR_ENCODING_8859_2=	11,/* ISO-8859-2 ISO Latin 2 */
;;     XML_CHAR_ENCODING_8859_3=	12,/* ISO-8859-3 */
;;     XML_CHAR_ENCODING_8859_4=	13,/* ISO-8859-4 */
;;     XML_CHAR_ENCODING_8859_5=	14,/* ISO-8859-5 */
;;     XML_CHAR_ENCODING_8859_6=	15,/* ISO-8859-6 */
;;     XML_CHAR_ENCODING_8859_7=	16,/* ISO-8859-7 */
;;     XML_CHAR_ENCODING_8859_8=	17,/* ISO-8859-8 */
;;     XML_CHAR_ENCODING_8859_9=	18,/* ISO-8859-9 */
;;     XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
;;     XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
;;     XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
;;     XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
;; } xmlCharEncoding

;; xmlCharEncodingInputFunc:
;; @out:  a pointer to an array of bytes to store the UTF-8 result
;; @outlen:  the length of @out
;; @in:  a pointer to an array of chars in the original encoding
;; @inlen:  the length of @in
;
;; Take a block of chars in the original encoding and try to convert
;; it to an UTF-8 block of chars out.
;
;; Returns the number of bytes written, -1 if lack of space, or -2
;;     if the transcoding failed.
;; The value of @inlen after return is the number of octets consumed
;;     if the return value is positive, else unpredictiable.
;; The value of @outlen after return is the number of octets consumed.
;;  */
;; typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
;;                                          const unsigned char *in, int *inlen)


;; xmlCharEncodingOutputFunc:
;; @out:  a pointer to an array of bytes to store the result
;; @outlen:  the length of @out
;; @in:  a pointer to an array of UTF-8 chars
;; @inlen:  the length of @in
;
;; Take a block of UTF-8 chars in and try to convert it to another
;; encoding.
;; Note: a first call designed to produce heading info is called with
;; in = NULL. If stateful this should also initialize the encoder state.
;
;; Returns the number of bytes written, -1 if lack of space, or -2
;;     if the transcoding failed.
;; The value of @inlen after return is the number of octets consumed
;;     if the return value is positive, else unpredictiable.
;; The value of @outlen after return is the number of octets produced.

;; typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
;;                                           const unsigned char *in, int *inlen)


;; Block defining the handlers for non UTF-8 encodings.
;; If iconv is supported, there are two extra fields.

(define-object xmlCharEncodingHandler ()
  (fields
   (string name)
;;     xmlCharEncodingInputFunc   input
;;     xmlCharEncodingOutputFunc  output
;; #ifdef LIBXML_ICONV_ENABLED
;;     iconv_t                    iconv_in
;;     iconv_t                    iconv_out
;; #endif /* LIBXML_ICONV_ENABLED */
   ))

;; Interfaces for encoding handlers.

(define-func xmlInitCharEncodingHandlers  void	())
(define-func xmlCleanupCharEncodingHandlers  void ())
(define-func xmlRegisterCharEncodingHandler  void ((xmlCharEncodingHandler handler)))
; (define-func xmlGetCharEncodingHandler xmlCharEncodingHandler ((xmlCharEncoding enc)))

(define-export (xml-find-char-encoding-handler name::string)
  (let* ((name::string name)
	 (handler::xml-char-encoding-handler
	  (pragma::xml-char-encoding-handler
	   "xmlFindCharEncodingHandler((char*)$1)" name)))
    (and (pragma::bool "$1 != NULL" handler)
	 handler)))

; (define-func xmlNewCharEncodingHandler xmlCharEncodingHandler
;   ((string name "char*")
;    (xmlCharEncodingInputFunc input)
;    (xmlCharEncodingOutputFunc output)))

;; Interfaces for encoding names and aliases.

(define-func xmlAddEncodingAlias int ((string name "char*")
				      (string alias "char*")))
(define-func xmlDelEncodingAlias  int ((string alias "char*")))

(define-export (xml-get-encoding-alias::string alias::string)
  (let ((alias::string alias))
    (pragma::string
     "(char*)xmlGetEncodingAlias((char*)$1)"
     alias)))

(define-func xmlCleanupEncodingAliases  void ())
; (define-func xmlParseCharEncoding  xmlCharEncoding ((string name "char*")))
; (define-func xmlGetCharEncodingName string ((xmlCharEncoding enc)))

;; Interfaces directly used by the parsers.

; (define-func xmlDetectCharEncoding  xmlCharEncoding ((string in "char*")
; 						     (int len)))

(define-func xmlCharEncOutFunc int
  ((xmlCharEncodingHandler handler)
   (xmlBuffer out)
   (xmlBuffer in)))

;; (define-func xmlCharEncInFunc  int ((xmlCharEncodingHandler *handler)
;; 				    (xmlBuffer out)
;; 				    (xmlBuffer in)))

;; (define-func xmlCharEncFirstLine  int ((xmlCharEncodingHandler *handler)
;; 				       (xmlBuffer out)
;; 				       (xmlBuffer in)))

(define-func xmlCharEncCloseFunc int ((xmlCharEncodingHandler handler)))

;; Export a few useful functions

;;(define-func UTF8Toisolat1  int ((unsigned char *out,))
;; 					 int *outlen,
;; 					 const unsigned char *in,
;; 					 int *inlen)

;;(define-func isolat1ToUTF8  int ((unsigned char *out,))
;; 					 int *outlen,
;; 					 const unsigned char *in,
;; 					 int *inlen)
